In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
%matplotlib inline
%matplotlib inline
is necessary for Seaborn to display the plot without additional calls
In [2]:
train = pd.read_csv("train.csv")
In [3]:
test = pd.read_csv("test.csv")
We use Pandas to load the csv files.
In [4]:
train.head()
Out[4]:
In [6]:
sns.barplot('MSSubClass', 'SalePrice', data=train)
Out[6]:
MSSubClass: Identifies the type of dwelling involved in the sale
In [7]:
sns.barplot('MSZoning', 'SalePrice', data=train)
Out[7]:
In [8]:
sns.lmplot('LotFrontage', 'SalePrice', data=train, fit_reg=True)
# Note that the regression line is highly impacted by outliers
Out[8]:
In [9]:
sns.lmplot('LotArea', 'SalePrice', data=train, fit_reg=True)
# Note that the regression line is highly impacted by outliers
Out[9]:
In [10]:
sns.barplot('Street', 'SalePrice', data=train)
Out[10]:
In [11]:
sns.barplot('Alley', 'SalePrice', data=train)
Out[11]:
In [12]:
sns.barplot('LotShape', 'SalePrice', data=train)
# Reg Regular
# IR1 Slightly irregular
# IR2 Moderately Irregular
# IR3 Irregular
Out[12]:
In [13]:
sns.barplot('LandContour', 'SalePrice', data=train)
Out[13]:
In [14]:
sns.barplot('Utilities', 'SalePrice', data=train)
Out[14]:
In [15]:
sns.barplot('LotConfig', 'SalePrice', data=train)
Out[15]:
In [16]:
sns.barplot('LandSlope', 'SalePrice', data=train)
Out[16]:
In [17]:
sns.barplot('Neighborhood', 'SalePrice', data=train)
Out[17]:
In [18]:
sns.barplot('Condition1', 'SalePrice', data=train)
Out[18]:
In [19]:
sns.barplot('Condition2', 'SalePrice', data=train)
Out[19]:
In [20]:
sns.barplot('BldgType', 'SalePrice', data=train)
Out[20]:
In [21]:
sns.barplot('HouseStyle', 'SalePrice', data=train)
Out[21]:
In [105]:
sns.barplot('OverallQual', 'SalePrice', data=train)
Out[105]:
In [104]:
sns.barplot('OverallCond', 'SalePrice', data=train)
# Notice how the wide range of values for 5 distorts the regression line
Out[104]:
In [27]:
sns.lmplot('YearBuilt', 'SalePrice', data=train, fit_reg=True)
Out[27]:
In [29]:
sns.lmplot('YearRemodAdd', 'SalePrice', data=train, fit_reg=True)
Out[29]:
In [31]:
sns.barplot('RoofStyle', 'SalePrice', data=train)
Out[31]:
In [33]:
sns.barplot('RoofMatl', 'SalePrice', data=train)
Out[33]:
In [34]:
sns.barplot('Exterior1st', 'SalePrice', data=train)
Out[34]:
In [35]:
sns.barplot('Exterior2nd', 'SalePrice', data=train)
Out[35]:
In [36]:
sns.barplot('MasVnrType', 'SalePrice', data=train)
Out[36]:
In [40]:
sns.lmplot('MasVnrArea', 'SalePrice', data=train, fit_reg=True)
# The high number of cases with an area = 0 is causing bias in the regression line
Out[40]:
In [41]:
sns.barplot('ExterQual', 'SalePrice', data=train)
# Seems that exterior quality is associated with higher sale price compared to...
Out[41]:
In [45]:
sns.barplot('ExterCond', 'SalePrice', data=train)
# Exterior condition
Out[45]:
In [42]:
sns.barplot('Foundation', 'SalePrice', data=train)
# Poured concrete is more likely to be associated with higher sales prices
Out[42]:
In [43]:
sns.barplot('BsmtQual', 'SalePrice', data=train)
# Evaluates the height of the basement
# There is no "Po" (Poor, <70 inches)
# Seems that a high basement ceiling is associated with much higher Sales Price
Out[43]:
In [44]:
sns.barplot('BsmtCond', 'SalePrice', data=train)
# Strange that the good quality basements would be associated with higher Sales Price
# This may suggest that Basement Condition is not as import
Out[44]:
In [46]:
sns.barplot('BsmtExposure', 'SalePrice', data=train)
Out[46]:
In [47]:
sns.barplot('BsmtFinType1', 'SalePrice', data=train)
Out[47]:
In [49]:
sns.lmplot('BsmtFinSF1', 'SalePrice', data=train, fit_reg=True)
# Regression line is again impacted by the high number of houses with area of 0
Out[49]:
In [50]:
sns.barplot('BsmtFinType2', 'SalePrice', data=train)
Out[50]:
In [52]:
sns.lmplot('BsmtFinSF2', 'SalePrice', data=train, fit_reg=True)
Out[52]:
In [54]:
sns.lmplot('BsmtUnfSF', 'SalePrice', data=train, fit_reg=True)
Out[54]:
In [55]:
sns.lmplot('TotalBsmtSF', 'SalePrice', data=train, fit_reg=True)
Out[55]:
In [56]:
sns.barplot('Heating', 'SalePrice', data=train)
Out[56]:
In [57]:
sns.barplot('HeatingQC', 'SalePrice', data=train)
Out[57]:
In [58]:
sns.barplot('CentralAir', 'SalePrice', data=train)
Out[58]:
In [59]:
sns.barplot('Electrical', 'SalePrice', data=train)
Out[59]:
In [60]:
sns.lmplot('1stFlrSF', 'SalePrice', data=train, fit_reg=True)
Out[60]:
In [61]:
sns.lmplot('2ndFlrSF', 'SalePrice', data=train, fit_reg=True)
Out[61]:
In [62]:
sns.lmplot('LowQualFinSF', 'SalePrice', data=train, fit_reg=True)
Out[62]:
In [63]:
sns.lmplot('GrLivArea', 'SalePrice', data=train, fit_reg=True)
Out[63]:
In [66]:
sns.barplot('BsmtFullBath', 'SalePrice', data=train)
Out[66]:
In [67]:
sns.barplot('BsmtHalfBath', 'SalePrice', data=train)
Out[67]:
In [71]:
sns.barplot('BedroomAbvGr', 'SalePrice', data=train)
Out[71]:
In [72]:
sns.barplot('KitchenAbvGr', 'SalePrice', data=train)
Out[72]:
In [73]:
sns.barplot('KitchenQual', 'SalePrice', data=train)
Out[73]:
In [75]:
sns.barplot('TotRmsAbvGrd', 'SalePrice', data=train)
Out[75]:
In [76]:
sns.barplot('Functional', 'SalePrice', data=train)
Out[76]:
In [77]:
sns.barplot('Fireplaces', 'SalePrice', data=train)
Out[77]:
In [78]:
sns.barplot('FireplaceQu', 'SalePrice', data=train)
Out[78]:
In [79]:
sns.barplot('GarageType', 'SalePrice', data=train)
Out[79]:
In [81]:
sns.lmplot('GarageYrBlt', 'SalePrice', data=train, fit_reg=True)
Out[81]:
In [82]:
sns.barplot('GarageFinish', 'SalePrice', data=train)
Out[82]:
In [83]:
sns.barplot('GarageCars', 'SalePrice', data=train)
Out[83]:
In [84]:
sns.lmplot('GarageArea', 'SalePrice', data=train, fit_reg=True)
Out[84]:
In [86]:
sns.barplot('GarageQual', 'SalePrice', data=train)
Out[86]:
In [85]:
sns.barplot('GarageCond', 'SalePrice', data=train)
Out[85]:
In [87]:
sns.barplot('PavedDrive', 'SalePrice', data=train)
Out[87]:
In [88]:
sns.lmplot('WoodDeckSF', 'SalePrice', data=train, fit_reg=True)
Out[88]:
In [89]:
sns.lmplot('OpenPorchSF', 'SalePrice', data=train, fit_reg=True)
Out[89]:
In [90]:
sns.lmplot('EnclosedPorch', 'SalePrice', data=train, fit_reg=True)
Out[90]:
In [91]:
sns.lmplot('3SsnPorch', 'SalePrice', data=train, fit_reg=True)
Out[91]:
In [92]:
sns.lmplot('ScreenPorch', 'SalePrice', data=train, fit_reg=True)
Out[92]:
In [93]:
sns.lmplot('PoolArea', 'SalePrice', data=train, fit_reg=True)
Out[93]:
In [95]:
sns.barplot('PoolQC', 'SalePrice', data=train)
Out[95]:
In [96]:
sns.barplot('Fence', 'SalePrice', data=train)
Out[96]:
In [97]:
sns.barplot('MiscFeature', 'SalePrice', data=train)
Out[97]:
In [99]:
sns.lmplot('MiscVal', 'SalePrice', data=train, fit_reg=True)
Out[99]:
In [100]:
sns.barplot('MoSold', 'SalePrice', data=train)
Out[100]:
In [101]:
sns.barplot('YrSold', 'SalePrice', data=train)
Out[101]:
In [102]:
sns.barplot('SaleType', 'SalePrice', data=train)
Out[102]:
In [103]:
sns.barplot('SaleCondition', 'SalePrice', data=train)
Out[103]: